library(data.table)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(ggfortify)
library(scatterplot3d)
matches=readRDS("C:\\Users\\a-adsene\\Desktop\\ETM58D\\df9b1196-e3cf-4cc7-9159-f236fe738215_matches.rds")
odds=readRDS('C:\\Users\\a-adsene\\Desktop\\ETM58D\\df9b1196-e3cf-4cc7-9159-f236fe738215_odd_details.rds')
matches$type=NULL
matches$leagueId=NULL
matches$home=NULL
matches$away=NULL
matches$date=NULL
matches[,c('score_home','score_away') := tstrsplit(score,':')]
matches[,score_home:=as.numeric(score_home)]
## Warning in eval(jsub, SDenv, parent.frame()): NAs introduced by coercion
matches[,score_away:=as.numeric(score_away)]
matches[,c('total_score'):=score_home+score_away]
matches= matches[complete.cases(matches)]
matches= matches[score_home>score_away, Result:="Home"]
matches= matches[score_home<score_away, Result:="Away"]
matches= matches[score_home==score_away, Result:="Draw"]
matches[,score_home:=NULL]
matches[,score_away:=NULL]
matches[,ou:=ifelse(total_score>=2.5,"over","under")]
Ordering depending on bookmakers
setorder(odds,matchId,bookmaker,betType,oddtype,date)
odds2 = odds[,.SD[.N],by=list(matchId,bookmaker,betType,oddtype)]
odds3 = dcast(odds2,matchId~betType+oddtype+bookmaker, value.var = 'odd')
odds4 = odds3[,lapply(.SD,function(x){sum(!is.na(x))})]
We have reached the maximum numbers of recurred odds to analyze wisely and selected first 70 ones
columnstokeep = names(sort(unlist(odds4),decreasing = T)[1:70])
odds5 = odds3[,columnstokeep,with=F]
odds6 = odds5[complete.cases(odds5)]
odds7=merge(odds6,matches[,c("matchId","ou")],by="matchId")
odds7
## matchId 1x2_odd1_10Bet 1x2_odd1_12BET 1x2_odd1_188BET
## 1: 02oVDuv1 1.80 1.83 1.83
## 2: 04PCiQzK 1.46 1.47 1.50
## 3: 04vrPwsg 2.00 2.02 2.07
## 4: 04zko0D5 1.13 1.13 1.14
## 5: 04zko0D5 1.13 1.13 1.14
## ---
## 2721: zwD6OZpK 1.61 1.62 1.60
## 2722: zwD77hZh 1.77 1.87 1.86
## 2723: zwYQNFfa 2.08 2.17 2.17
## 2724: zy3x0uCs 1.21 1.22 1.23
## 2725: zylytbNg 2.15 2.20 2.19
## 1x2_odd1_Betclic 1x2_odd1_Betsafe 1x2_odd1_Betsson 1x2_odd1_SBOBET
## 1: 1.80 1.85 1.85 1.86
## 2: 1.50 1.47 1.53 1.47
## 3: 2.00 2.01 2.01 2.04
## 4: 1.14 1.15 1.15 1.14
## 5: 1.14 1.15 1.15 1.14
## ---
## 2721: 1.60 1.66 1.68 1.63
## 2722: 1.80 1.79 1.85 1.85
## 2723: 2.10 2.15 2.10 2.16
## 2724: 1.22 1.20 1.20 1.22
## 2725: 2.10 2.20 2.20 2.21
## 1x2_odd1_bet-at-home 1x2_odd1_bet365 1x2_odd2_10Bet 1x2_odd2_12BET
## 1: 1.78 1.85 4.85 4.70
## 2: 1.50 1.50 7.63 7.66
## 3: 1.96 2.05 4.05 4.00
## 4: 1.13 1.14 24.00 17.00
## 5: 1.13 1.14 24.00 17.00
## ---
## 2721: 1.60 1.65 5.45 5.40
## 2722: 1.80 1.85 4.65 4.22
## 2723: 2.00 2.10 3.78 3.64
## 2724: 1.20 1.22 15.96 13.97
## 2725: 2.10 2.14 3.40 3.25
## 1x2_odd2_188BET 1x2_odd2_Betclic 1x2_odd2_Betsafe 1x2_odd2_Betsson
## 1: 4.55 4.50 4.75 4.75
## 2: 8.10 6.50 7.60 6.65
## 3: 4.00 3.90 4.20 4.20
## 4: 21.00 19.50 20.00 20.00
## 5: 21.00 19.50 20.00 20.00
## ---
## 2721: 5.40 5.25 5.10 5.15
## 2722: 4.40 4.25 4.50 4.65
## 2723: 3.70 3.50 3.55 3.50
## 2724: 15.00 12.00 15.00 15.50
## 2725: 3.40 3.25 3.45 3.45
## 1x2_odd2_SBOBET 1x2_odd2_bet-at-home 1x2_odd2_bet365 1x2_oddX_10Bet
## 1: 4.7 4.58 4.75 3.55
## 2: 7.8 6.80 7.00 4.04
## 3: 3.8 4.02 4.10 3.30
## 4: 20.0 20.04 23.00 8.75
## 5: 20.0 20.04 23.00 8.75
## ---
## 2721: 5.2 5.19 5.50 4.20
## 2722: 4.4 4.30 4.59 3.65
## 2723: 3.6 3.55 3.60 3.23
## 2724: 16.5 14.50 15.00 6.22
## 2725: 3.3 3.30 3.50 3.55
## 1x2_oddX_12BET 1x2_oddX_188BET 1x2_oddX_Betclic 1x2_oddX_Betsafe
## 1: 3.45 3.65 3.40 3.70
## 2: 4.18 4.15 3.90 4.05
## 3: 3.28 3.25 3.25 3.25
## 4: 8.40 8.70 8.00 9.50
## 5: 8.40 8.70 8.00 9.50
## ---
## 2721: 4.05 4.30 4.00 4.25
## 2722: 3.56 3.60 3.50 3.60
## 2723: 3.19 3.30 3.20 3.20
## 2724: 6.30 6.50 5.75 6.25
## 2725: 3.50 3.45 3.50 3.60
## 1x2_oddX_Betsson 1x2_oddX_SBOBET 1x2_oddX_bet-at-home
## 1: 3.70 3.35 3.58
## 2: 3.85 4.09 3.70
## 3: 3.25 3.40 3.23
## 4: 9.50 8.25 7.96
## 5: 9.50 8.25 7.96
## ---
## 2721: 4.25 4.10 4.10
## 2722: 3.75 3.45 3.50
## 2723: 3.30 3.20 3.30
## 2724: 6.00 5.80 5.50
## 2725: 3.60 3.40 3.55
## 1x2_oddX_bet365 1x2_odd1_BetVictor 1x2_odd1_Pinnacle 1x2_odd1_Unibet
## 1: 3.60 1.83 1.83 1.85
## 2: 4.00 1.50 1.47 1.53
## 3: 3.40 1.95 2.06 2.00
## 4: 9.00 1.14 1.14 1.16
## 5: 9.00 1.14 1.14 1.16
## ---
## 2721: 4.20 1.65 1.64 1.64
## 2722: 3.75 1.85 1.88 1.80
## 2723: 3.30 2.15 2.18 2.15
## 2724: 6.00 1.22 1.23 1.22
## 2725: 3.60 2.20 2.21 2.19
## 1x2_odd2_BetVictor 1x2_odd2_Pinnacle 1x2_odd2_Unibet
## 1: 4.80 5.00 4.80
## 2: 7.50 8.50 6.75
## 3: 4.20 4.18 4.15
## 4: 26.00 23.00 23.00
## 5: 26.00 23.00 23.00
## ---
## 2721: 4.50 5.53 5.70
## 2722: 4.59 4.58 4.50
## 2723: 3.80 3.92 3.30
## 2724: 19.00 18.30 14.00
## 2725: 3.30 3.45 3.42
## 1x2_oddX_BetVictor 1x2_oddX_Pinnacle 1x2_oddX_Unibet
## 1: 3.70 3.63 3.65
## 2: 4.20 4.40 3.70
## 3: 3.40 3.38 3.35
## 4: 9.00 8.85 8.50
## 5: 9.00 8.85 8.50
## ---
## 2721: 4.33 4.37 4.20
## 2722: 3.75 3.73 3.60
## 2723: 3.25 3.26 3.25
## 2724: 6.50 6.70 5.50
## 2725: 3.70 3.59 3.62
## 1x2_odd1_Interwetten 1x2_odd1_William Hill 1x2_odd1_bwin
## 1: 1.90 1.83 1.83
## 2: 1.60 1.50 1.48
## 3: 2.00 1.95 2.00
## 4: 1.17 1.14 1.16
## 5: 1.17 1.14 1.16
## ---
## 2721: 1.70 1.65 1.62
## 2722: 1.85 1.85 1.83
## 2723: 2.10 2.04 2.00
## 2724: 1.25 1.22 1.20
## 2725: 2.20 2.15 2.10
## 1x2_odd2_Interwetten 1x2_odd2_William Hill 1x2_odd2_bwin
## 1: 4.1 4.80 4.60
## 2: 5.0 7.50 7.00
## 3: 3.6 4.33 4.10
## 4: 15.0 19.00 18.50
## 5: 15.0 19.00 18.50
## ---
## 2721: 5.0 5.00 5.00
## 2722: 4.0 4.33 4.20
## 2723: 3.2 3.75 3.65
## 2724: 10.0 15.00 15.00
## 2725: 3.4 3.40 3.50
## 1x2_oddX_Interwetten 1x2_oddX_William Hill 1x2_oddX_bwin
## 1: 3.50 3.50 3.50
## 2: 3.60 3.75 4.00
## 3: 3.30 3.10 3.30
## 4: 7.50 7.50 7.25
## 5: 7.50 7.50 7.25
## ---
## 2721: 3.70 4.00 4.33
## 2722: 3.45 3.50 3.50
## 2723: 3.20 3.25 3.25
## 2724: 5.00 5.50 5.50
## 2725: 3.30 3.50 3.50
## 1x2_odd1_Betway 1x2_odd2_Betway 1x2_oddX_Betway 1x2_odd1_888sport
## 1: 1.83 4.75 3.60 1.82
## 2: 1.50 6.75 3.85 1.57
## 3: 2.05 4.00 3.25 2.00
## 4: 1.15 23.00 8.50 1.15
## 5: 1.15 23.00 8.50 1.15
## ---
## 2721: 1.67 5.25 4.20 1.61
## 2722: 1.83 4.33 3.50 1.83
## 2723: 2.10 3.45 3.25 2.00
## 2724: 1.20 15.00 5.75 1.22
## 2725: 2.15 3.50 3.50 2.16
## 1x2_odd2_888sport 1x2_oddX_888sport 1x2_odd1_Sportingbet
## 1: 4.70 3.60 1.85
## 2: 6.50 4.00 1.50
## 3: 4.15 3.35 2.00
## 4: 19.00 8.50 1.15
## 5: 19.00 8.50 1.15
## ---
## 2721: 5.60 4.10 1.57
## 2722: 4.33 3.60 1.75
## 2723: 3.75 3.20 2.00
## 2724: 15.00 5.50 1.20
## 2725: 3.35 3.55 2.03
## 1x2_odd2_Sportingbet 1x2_oddX_Sportingbet 1x2_odd1_Paddy Power
## 1: 4.75 3.50 1.80
## 2: 6.50 3.60 1.57
## 3: 4.00 3.30 2.00
## 4: 15.00 7.00 1.12
## 5: 15.00 7.00 1.12
## ---
## 2721: 5.20 4.20 1.62
## 2722: 4.33 3.75 1.85
## 2723: 3.40 3.25 2.15
## 2724: 11.00 5.75 1.22
## 2725: 3.30 3.70 2.15
## 1x2_odd2_Paddy Power 1x2_oddX_Paddy Power 1x2_odd1_youwin
## 1: 4.5 3.70 1.78
## 2: 6.5 3.75 1.50
## 3: 4.0 3.30 1.97
## 4: 23.0 8.50 1.11
## 5: 23.0 8.50 1.11
## ---
## 2721: 5.0 4.33 1.57
## 2722: 4.2 3.60 1.83
## 2723: 3.5 3.25 2.20
## 2724: 13.0 6.00 1.24
## 2725: 3.4 3.50 1.95
## 1x2_odd2_youwin 1x2_oddX_youwin ha_1_bet365 ha_2_bet365 ah_1_10Bet
## 1: 4.50 3.40 1.30 3.39 2.09
## 2: 7.25 4.00 1.13 5.50 1.49
## 3: 4.07 3.26 1.44 2.63 1.32
## 4: 14.00 6.75 1.02 19.00 2.15
## 5: 14.00 6.75 1.02 19.00 2.15
## ---
## 2721: 5.20 4.20 1.25 3.75 1.78
## 2722: 4.50 3.50 1.33 3.25 1.88
## 2723: 4.00 3.10 1.50 2.50 1.49
## 2724: 18.50 6.10 1.05 11.00 1.62
## 2725: 3.20 3.60 1.53 2.37 1.59
## ah_1_12BET ah_1_188BET ah_1_Pinnacle ah_1_SBOBET ah_1_bet365
## 1: 1.84 1.83 1.32 1.86 1.24
## 2: 2.12 1.82 1.81 2.11 2.25
## 3: 2.04 2.44 1.76 2.05 1.25
## 4: 1.70 1.68 2.22 1.94 1.93
## 5: 1.70 1.68 2.22 1.94 1.93
## ---
## 2721: 2.11 1.61 1.43 1.79 1.77
## 2722: 2.20 1.89 1.64 1.86 1.30
## 2723: 1.85 1.84 1.86 1.83 1.20
## 2724: 1.89 1.69 1.88 1.89 1.65
## 2725: 1.64 1.91 1.91 1.93 1.57
## ah_2_10Bet ou
## 1: 1.78 under
## 2: 5.75 over
## 3: 3.14 under
## 4: 2.28 over
## 5: 2.28 over
## ---
## 2721: 2.09 under
## 2722: 2.00 over
## 2723: 2.54 over
## 2724: 10.75 under
## 2725: 2.42 under
pcaodds = prcomp(odds7[,c(-1,-71)],scale. = T)
summary(pcaodds)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 6.4227 4.4158 1.23886 1.07396 1.02223 0.98482
## Proportion of Variance 0.5978 0.2826 0.02224 0.01672 0.01514 0.01406
## Cumulative Proportion 0.5978 0.8804 0.90268 0.91940 0.93454 0.94860
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.94739 0.93171 0.68884 0.5633 0.35911 0.2880
## Proportion of Variance 0.01301 0.01258 0.00688 0.0046 0.00187 0.0012
## Cumulative Proportion 0.96161 0.97419 0.98106 0.9857 0.98753 0.9887
## PC13 PC14 PC15 PC16 PC17 PC18
## Standard deviation 0.26816 0.22743 0.21007 0.19465 0.18837 0.17459
## Proportion of Variance 0.00104 0.00075 0.00064 0.00055 0.00051 0.00044
## Cumulative Proportion 0.98978 0.99053 0.99116 0.99171 0.99223 0.99267
## PC19 PC20 PC21 PC22 PC23 PC24
## Standard deviation 0.17059 0.16249 0.15020 0.14589 0.1434 0.13887
## Proportion of Variance 0.00042 0.00038 0.00033 0.00031 0.0003 0.00028
## Cumulative Proportion 0.99309 0.99347 0.99380 0.99411 0.9944 0.99469
## PC25 PC26 PC27 PC28 PC29 PC30
## Standard deviation 0.13504 0.13341 0.13086 0.12806 0.12454 0.12318
## Proportion of Variance 0.00026 0.00026 0.00025 0.00024 0.00022 0.00022
## Cumulative Proportion 0.99495 0.99521 0.99546 0.99569 0.99592 0.99614
## PC31 PC32 PC33 PC34 PC35 PC36
## Standard deviation 0.11991 0.11481 0.11294 0.11161 0.10896 0.10823
## Proportion of Variance 0.00021 0.00019 0.00018 0.00018 0.00017 0.00017
## Cumulative Proportion 0.99635 0.99654 0.99672 0.99690 0.99708 0.99725
## PC37 PC38 PC39 PC40 PC41 PC42
## Standard deviation 0.10551 0.10325 0.09894 0.09757 0.09660 0.09401
## Proportion of Variance 0.00016 0.00015 0.00014 0.00014 0.00014 0.00013
## Cumulative Proportion 0.99741 0.99756 0.99770 0.99784 0.99798 0.99811
## PC43 PC44 PC45 PC46 PC47 PC48
## Standard deviation 0.09026 0.08945 0.08684 0.08604 0.08577 0.08535
## Proportion of Variance 0.00012 0.00012 0.00011 0.00011 0.00011 0.00011
## Cumulative Proportion 0.99822 0.99834 0.99845 0.99856 0.99866 0.99877
## PC49 PC50 PC51 PC52 PC53 PC54
## Standard deviation 0.0822 0.07979 0.07707 0.07655 0.07455 0.07286
## Proportion of Variance 0.0001 0.00009 0.00009 0.00008 0.00008 0.00008
## Cumulative Proportion 0.9989 0.99896 0.99904 0.99913 0.99921 0.99929
## PC55 PC56 PC57 PC58 PC59 PC60
## Standard deviation 0.07205 0.06786 0.06609 0.06597 0.06483 0.06080
## Proportion of Variance 0.00008 0.00007 0.00006 0.00006 0.00006 0.00005
## Cumulative Proportion 0.99936 0.99943 0.99949 0.99956 0.99962 0.99967
## PC61 PC62 PC63 PC64 PC65 PC66
## Standard deviation 0.05841 0.05648 0.05417 0.05262 0.05151 0.04895
## Proportion of Variance 0.00005 0.00005 0.00004 0.00004 0.00004 0.00003
## Cumulative Proportion 0.99972 0.99977 0.99981 0.99985 0.99989 0.99992
## PC67 PC68 PC69
## Standard deviation 0.04826 0.04127 0.03752
## Proportion of Variance 0.00003 0.00002 0.00002
## Cumulative Proportion 0.99995 0.99998 1.00000
The model can explain 59% with PC1 and can be increased to 90% by using the first three ones.
autoplot(pcaodds,data=odds7,colour="ou")
2D plot of PCA is shown above and colours show if the game is ended over or under. Regarding the 2D plot, we can not classify bookmakers’ odd data. Let’s check on 3D data
pcadt = as.data.table(pcaodds$x)
pcadt
## PC1 PC2 PC3 PC4 PC5 PC6
## 1: 0.1094996 -2.8165224 1.4104979 -0.9471604 0.03794329 0.67351289
## 2: -3.0756491 -2.7842941 -2.7050423 1.9059103 -0.01578599 -0.06709497
## 3: 1.3709472 -3.1377088 -0.9999828 2.5622927 0.27547972 0.40470371
## 4: -23.4735068 6.2043268 1.2517322 -0.8189955 -0.28934379 -0.86054643
## 5: -23.4735068 6.2043268 1.2517322 -0.8189955 -0.28934379 -0.86054643
## ---
## 2721: -1.8650193 -2.0208963 0.6246313 -0.7078497 0.51104710 0.23256529
## 2722: 0.2844541 -2.7909304 1.1473108 0.3462261 0.91200215 0.25469514
## 2723: 1.9530650 -3.0205871 -0.6600313 -0.2168889 0.48359437 -0.18563978
## 2724: -14.0294502 0.8648433 -6.9213802 1.5674308 0.66730178 -0.42089214
## 2725: 1.5368982 -2.3202307 -0.3591720 -0.3352899 -0.39602160 -0.21985557
## PC7 PC8 PC9 PC10 PC11
## 1: 0.4050462 -0.1640882 -0.47008849 0.1591410 0.08352374
## 2: -1.5277973 0.6692502 -2.23272954 0.2091531 -0.20469269
## 3: 0.9780660 -0.5705533 0.25449716 0.2998972 0.05624781
## 4: -0.6843605 -0.9763892 -0.88193349 1.5925573 1.24394682
## 5: -0.6843605 -0.9763892 -0.88193349 1.5925573 1.24394682
## ---
## 2721: -0.3548408 1.2288559 0.39704400 -0.9225916 -0.05511780
## 2722: 0.1244794 0.8342132 -0.01428751 -0.1508101 -0.03194472
## 2723: 0.1107918 -0.3243119 0.39394545 0.2595864 0.03477708
## 2724: -1.7992546 0.4402260 -6.85139209 0.7000027 -0.34053743
## 2725: 0.2184204 -1.1963544 0.38383578 -0.5797396 0.05441387
## PC12 PC13 PC14 PC15 PC16
## 1: -0.105944230 -0.07878307 0.03262987 0.055638097 -0.089186869
## 2: -0.371674642 -0.16428381 -0.03290172 0.006974182 -0.171403999
## 3: 0.123919495 -0.13023886 -0.00640845 -0.130194988 -0.078002677
## 4: -1.582977205 0.03691612 -0.29679335 -1.326196447 -1.201234950
## 5: -1.582977205 0.03691612 -0.29679335 -1.326196447 -1.201234950
## ---
## 2721: -0.105980828 0.07227395 0.17979632 0.127133984 0.092402608
## 2722: -0.076581278 -0.06158830 0.05854913 0.057466006 0.002226481
## 2723: -0.009962589 -0.05283796 0.08976455 0.047732949 -0.035387164
## 2724: -0.364378531 -0.53831587 -0.26774331 0.169065260 -0.189259445
## 2725: -0.010281369 -0.05356648 0.14196746 0.077412781 0.027627301
## PC17 PC18 PC19 PC20 PC21
## 1: -0.02228185 -0.08106838 -0.016813083 -0.11286747 -0.04048568
## 2: 0.03139231 -0.05078094 0.100955142 0.05054150 0.06150107
## 3: 0.02108452 0.06234882 0.046172801 0.00367468 0.06586037
## 4: 0.66555157 -0.83887361 -0.325946002 -0.36937499 -0.04130945
## 5: 0.66555157 -0.83887361 -0.325946002 -0.36937499 -0.04130945
## ---
## 2721: -0.17494004 -0.05725638 0.051424066 -0.27185719 -0.02227207
## 2722: -0.05059468 0.03584093 0.010040668 0.02115357 -0.04040637
## 2723: 0.01916138 -0.04733382 -0.057066165 -0.02229914 0.02270040
## 2724: -0.04570588 -0.51110377 0.085752886 -0.17574701 0.01131185
## 2725: -0.12437726 -0.01150243 0.006787161 0.01602313 0.03068628
## PC22 PC23 PC24 PC25 PC26
## 1: -0.017622918 -5.495537e-03 0.062707173 -0.006027917 0.0398904900
## 2: -0.033167493 -1.174176e-01 0.003727930 0.125360574 -0.0284400415
## 3: 0.001346656 7.461795e-04 -0.008629437 0.045853243 -0.0217761261
## 4: 0.221934753 3.422885e-01 -0.257023947 -1.136345344 0.1351373514
## 5: 0.221934753 3.422885e-01 -0.257023947 -1.136345344 0.1351373514
## ---
## 2721: 0.016699255 1.201862e-01 -0.029884486 0.022645722 0.0999479488
## 2722: -0.030207213 2.976507e-02 0.074096500 -0.116210905 0.0186459121
## 2723: -0.030834127 8.449004e-05 -0.010237785 0.043453028 0.0001355646
## 2724: -0.263305986 5.732183e-02 0.550386611 -0.019115079 -0.4583536189
## 2725: -0.070903552 4.080252e-02 -0.042191764 -0.123379819 -0.0310108344
## PC27 PC28 PC29 PC30 PC31
## 1: -0.062396837 -0.0023505638 0.01946260 0.114915343 -0.02439480
## 2: -0.083268438 -0.0320332030 -0.08106263 -0.076181714 -0.05498551
## 3: 0.054053562 -0.0002475425 -0.01594402 0.041036132 -0.03903440
## 4: 0.105569609 -0.7821914442 -0.23204365 -0.034693391 0.16884332
## 5: 0.105569609 -0.7821914442 -0.23204365 -0.034693391 0.16884332
## ---
## 2721: -0.132889329 0.0673962396 0.03613995 0.084690320 -0.06023499
## 2722: 0.028532078 -0.0080006360 0.02008222 0.061557493 0.01742559
## 2723: -0.034495103 -0.0136679246 0.07798585 -0.009192346 -0.03049386
## 2724: -0.450581883 -0.4275874489 -0.08344979 -0.300326244 0.14073870
## 2725: 0.007768031 0.0251134220 0.02864915 0.030301571 0.02777475
## PC32 PC33 PC34 PC35 PC36
## 1: -0.03274894 0.01832449 0.044599906 0.05625284 0.004519915
## 2: 0.14090464 0.14081884 -0.029565231 -0.09175212 -0.127747971
## 3: 0.05522705 -0.01238936 0.011275701 -0.01048005 0.037549144
## 4: -0.02056760 0.20025409 0.567384143 -0.07717095 0.446986229
## 5: -0.02056760 0.20025409 0.567384143 -0.07717095 0.446986229
## ---
## 2721: 0.02716941 -0.02396095 -0.124799007 0.02205239 -0.033668144
## 2722: -0.01740148 0.02932837 -0.001514987 0.01180951 0.037842167
## 2723: -0.03697881 0.02801642 -0.004845223 0.01235249 -0.004131060
## 2724: 0.14025518 -0.18075513 0.430064000 -0.02184590 -0.292874705
## 2725: -0.05362789 -0.10407587 0.019829875 -0.04062907 -0.033022883
## PC37 PC38 PC39 PC40 PC41
## 1: -0.032861441 -0.0633794242 -0.046149597 0.004373496 -0.01084611
## 2: 0.093311575 0.0586987109 0.060585055 0.040741226 0.00487624
## 3: 0.009803268 0.0415914842 -0.003756978 0.028446140 -0.05107400
## 4: 0.010713647 -0.0903579812 0.135497480 0.279439491 0.09845857
## 5: 0.010713647 -0.0903579812 0.135497480 0.279439491 0.09845857
## ---
## 2721: -0.001887168 0.0839098601 -0.049040368 -0.031217232 -0.02737934
## 2722: 0.009032791 -0.0252548455 -0.001330507 0.004645809 -0.02557730
## 2723: -0.027596199 0.0007757012 -0.067885502 -0.007742378 -0.05401345
## 2724: 0.044397564 -0.1994510730 -0.057322543 -0.200525509 0.05440102
## 2725: 0.016119360 0.0224151988 -0.045472052 -0.031434933 -0.04194091
## PC42 PC43 PC44 PC45 PC46
## 1: 0.019419812 0.02701403 0.04854765 0.0097897080 -0.02183368
## 2: -0.144963900 0.02761574 0.09863950 0.0345945977 -0.02805210
## 3: 0.008640447 0.07040731 -0.05450916 0.0002682971 -0.02230707
## 4: 0.176000461 -0.28563034 -0.15293696 -0.0320327421 0.09555602
## 5: 0.176000461 -0.28563034 -0.15293696 -0.0320327421 0.09555602
## ---
## 2721: -0.037711764 0.05366819 -0.02319373 -0.0163214126 -0.01254730
## 2722: -0.028639285 0.02857550 -0.02285538 0.0995242363 0.04191148
## 2723: 0.025982034 0.01832109 0.02192807 0.0333122803 0.01119037
## 2724: 0.240227804 0.42834916 -0.18371303 0.1811024281 0.16981106
## 2725: -0.017087349 0.02298037 0.02310519 0.0497932510 0.01753672
## PC47 PC48 PC49 PC50 PC51
## 1: 0.043875867 0.04769054 -0.010578056 0.016648463 -0.01748164
## 2: -0.031704868 0.12826316 0.020374216 -0.027582287 0.03078230
## 3: 0.005558739 -0.03561931 -0.006056237 -0.008866502 -0.02548964
## 4: -0.259737971 0.20362851 0.036605283 0.031639152 0.02676519
## 5: -0.259737971 0.20362851 0.036605283 0.031639152 0.02676519
## ---
## 2721: 0.046264158 0.00433486 -0.089630469 0.060309779 0.01433026
## 2722: -0.022235597 -0.02101647 0.027663604 0.026437528 -0.01142296
## 2723: -0.061279485 -0.02843915 0.040460833 0.013152514 -0.05636214
## 2724: -0.284481254 -0.19081632 0.726526213 0.283241624 0.16305475
## 2725: -0.016681815 -0.07872398 -0.002173336 -0.066971006 -0.02676406
## PC52 PC53 PC54 PC55 PC56
## 1: 0.007898021 0.003798567 -0.018835089 -0.04038545 -0.02417409
## 2: 0.053133637 -0.058248975 0.031563814 -0.01585609 -0.01524540
## 3: 0.032414163 -0.012974830 -0.038542471 0.02812098 -0.02276349
## 4: -0.036563346 -0.005938669 -0.034866692 -0.19871451 -0.15963418
## 5: -0.036563346 -0.005938669 -0.034866692 -0.19871451 -0.15963418
## ---
## 2721: 0.015565485 0.032823474 -0.001089686 0.01354880 -0.03492478
## 2722: 0.034072549 0.004888457 0.002736128 -0.01244107 -0.01702541
## 2723: -0.018657471 0.046753738 -0.083608151 -0.04744320 0.00388358
## 2724: 0.079605022 0.037671421 -0.076590224 -0.05069652 -0.02549552
## 2725: -0.021597273 0.012494105 0.026835068 0.05230312 -0.01057994
## PC57 PC58 PC59 PC60 PC61
## 1: 0.0025523519 -0.002271119 0.02101917 -0.0095324127 0.016084434
## 2: 0.0116598476 0.045321923 0.08317484 -0.0567077761 -0.040147849
## 3: 0.0175285306 0.022987256 0.01099193 0.0041480220 -0.019288320
## 4: -0.0198279019 0.124407525 0.10483475 -0.1083259612 0.033070146
## 5: -0.0198279019 0.124407525 0.10483475 -0.1083259612 0.033070146
## ---
## 2721: -0.0002601175 -0.041651550 0.02111144 -0.0007344976 0.006003927
## 2722: -0.0761466714 -0.043408222 0.03805816 0.0141113724 0.015765373
## 2723: -0.0509622818 -0.073337214 -0.02983894 0.0656498354 -0.007682648
## 2724: -0.2673811735 -0.185746997 -0.12715742 0.1214070184 0.004342155
## 2725: -0.0071283862 0.017229940 0.00749681 -0.0187720615 0.007692525
## PC62 PC63 PC64 PC65 PC66
## 1: -0.01027039 0.003020514 0.013088730 -0.005440970 0.005979006
## 2: -0.07320618 0.075317386 -0.081157709 -0.045682721 -0.120745954
## 3: 0.01633244 -0.003464083 -0.004143754 -0.008208025 -0.016563021
## 4: 0.03688770 -0.014569985 0.119909674 0.002295273 0.055647470
## 5: 0.03688770 -0.014569985 0.119909674 0.002295273 0.055647470
## ---
## 2721: -0.02140903 -0.022364872 -0.029971134 0.013904637 0.030952466
## 2722: 0.04303969 -0.007619967 0.043082323 -0.045104350 0.015473048
## 2723: 0.02087650 -0.021562033 0.063066865 -0.036176166 0.001664523
## 2724: 0.03704126 0.069442237 0.020716406 -0.010762858 0.117163527
## 2725: -0.01255886 0.008039082 0.003563335 -0.006034935 0.003615364
## PC67 PC68 PC69
## 1: -0.000532480 -6.551427e-03 0.008969380
## 2: -0.004875613 -2.769420e-02 0.007347256
## 3: -0.017157192 4.965918e-05 0.006353475
## 4: -0.036089225 1.302745e-02 -0.008037706
## 5: -0.036089225 1.302745e-02 -0.008037706
## ---
## 2721: -0.007693429 6.857856e-03 -0.005375186
## 2722: -0.016603208 -3.786412e-03 -0.025533248
## 2723: -0.054256696 -2.368965e-02 0.023516895
## 2724: 0.002280593 -1.728587e-02 -0.033818251
## 2725: -0.012812491 -2.046184e-03 0.002322718
p <- plot_ly(pcadt, x = ~PC1, y = ~PC2, z = ~PC3, color = ~odds7$ou, colors = c('#BF382A', '#0C4B8E')) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'PC1'),
yaxis = list(title = 'PC2'),
zaxis = list(title = 'PC3')))
p
Regarding the 3D plot, we can not classify bookmakers’ odd data again.
#Distance matrix
manhattan=dist(pcadt,"manhattan")
manhattan[is.na(manhattan)]=0
#Scaling
manhattan=cmdscale(manhattan)
manhattandt=as.data.table(manhattan)
colnames(manhattandt)=c("C1","C2")
m=plot_ly(manhattandt, x = ~C1, y = ~C2, color = ~odds7$ou, colors = c('#BF382A', '#0C4B8E')) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'PC1'),
yaxis = list(title = 'PC2')))
m
#Distance matrix
eu=dist(pcadt)
eu[is.na(eu)]=0
#Scaling
eu=cmdscale(eu)
eudt=as.data.table(eu)
colnames(eudt)=c("C1","C2")
e=plot_ly(eudt, x = ~C1, y = ~C2, color = ~odds7$ou, colors = c('#BF382A', '#0C4B8E')) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'PC1'),
yaxis = list(title = 'PC2')))
e
Results of PCA and MDS are similar. We can not call a difference regarding all the graphs
require(data.table)
require(TunePareto)
## Loading required package: TunePareto
require(glmnet)
## Loading required package: glmnet
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-16
testStart=as.Date('2018-11-16')
trainStart=as.Date('2012-07-15')
rem_miss_threshold=0.01 #parameter for removing bookmaker odds with missing ratio greater than this threshold
source('C:\\Users\\a-adsene\\Desktop\\ETM58D\\data_preprocessing.r')
source('C:\\Users\\a-adsene\\Desktop\\ETM58D\\feature_extraction.r')
source('C:\\Users\\a-adsene\\Desktop\\ETM58D\\performance_metrics.r')
source('C:\\Users\\a-adsene\\Desktop\\ETM58D\\train_models.r')
# read data
matches_raw=readRDS("C:\\Users\\a-adsene\\Desktop\\ETM58D\\df9b1196-e3cf-4cc7-9159-f236fe738215_matches.rds")
odd_details_raw=readRDS('C:\\Users\\a-adsene\\Desktop\\ETM58D\\df9b1196-e3cf-4cc7-9159-f236fe738215_odd_details.rds')
# preprocess matches
matches=matches_data_preprocessing(matches_raw)
## Warning in strptime(Match_DateTime, "%Y-%m-%d %H:%M:%OS"): POSIXlt column
## type detected and converted to POSIXct. We do not recommend use of POSIXlt
## at all because it uses 40 bytes to store one date.
## Following postponed matches are KEPT during data_preprocessing:
## leagueId matchId home away score
## 1: df9b1196-e3cf-4cc7-9159-f236fe738215 EX7OmEj1 chelsea brighton POSTP.
## date type
## 1: 1551009600 soccer
# preprocess odd data
odd_details=details_data_preprocessing(odd_details_raw,matches)
# extract open and close odd type features from multiple bookmakers
features=extract_features.openclose(matches,odd_details,pMissThreshold=rem_miss_threshold,trainStart,testStart)
## Number of bookmakers with proportion of missings below 0.01 since 2012-07-15 : 12
## Number of bookmakers with no missings since testStart 2018-11-16 : 11
## Warning in `[.data.table`(matches, , -c("Home", "Away", "Home_Score",
## "Away_Score", : column(s) not removed because not found:
## Result_Home,Result_Tie,Result_Away
# divide data based on the provided dates
train_features=features[Match_Date>=trainStart & Match_Date<testStart]
test_features=features[Match_Date>=testStart]
We changed alpha as 0 to eleminate the penalty
# run glmnet on train data with tuning lambda parameter based on RPS and return predictions based on lambda with minimum RPS
predictions=train_glmnet(train_features, test_features,not_included_feature_indices=c(1:5), alpha=0,nlambda=50, tune_lambda=TRUE,nofReplications=2,nFolds=10,trace=T)
## Iteration 1: Fold 1 of Replication 1
## Iteration 2: Fold 2 of Replication 1
## Iteration 3: Fold 3 of Replication 1
## Iteration 4: Fold 4 of Replication 1
## Iteration 5: Fold 5 of Replication 1
## Iteration 6: Fold 6 of Replication 1
## Iteration 7: Fold 7 of Replication 1
## Iteration 8: Fold 8 of Replication 1
## Iteration 9: Fold 9 of Replication 1
## Iteration 10: Fold 10 of Replication 1
## Iteration 11: Fold 1 of Replication 2
## Iteration 12: Fold 2 of Replication 2
## Iteration 13: Fold 3 of Replication 2
## Iteration 14: Fold 4 of Replication 2
## Iteration 15: Fold 5 of Replication 2
## Iteration 16: Fold 6 of Replication 2
## Iteration 17: Fold 7 of Replication 2
## Iteration 18: Fold 8 of Replication 2
## Iteration 19: Fold 9 of Replication 2
## Iteration 20: Fold 10 of Replication 2
Here are the results;
predictions
## $predictions
## matchId Match_Result Over Under
## 1: 0AFLSIpA Under 0.04302878 0.95697122
## 2: 0Aeb7pV0 Over 0.96550069 0.03449931
## 3: 0QqMMPUm Over 0.96402193 0.03597807
## 4: 0Uu6adXM Under 0.03868408 0.96131592
## 5: 0pC7YMTa Over 0.96078852 0.03921148
## ---
## 156: zJAnjne2 Over 0.96498184 0.03501816
## 157: zNtirbpc Over 0.96139519 0.03860481
## 158: zROebAze Over 0.96161380 0.03838620
## 159: zar1vkdj Over 0.96321508 0.03678492
## 160: zeSaajYF Over 0.96250757 0.03749243
##
## $cv_stats
## $cv_stats$lambda.min
## [1] 0.0499506
##
## $cv_stats$lambda.1se
## [1] 0.0499506
##
## $cv_stats$meanRPS_min
## [1] 0.001646606
##
## $cv_stats$meanRPS_1se
## [1] 0.001646606